*==============================================================================*
* Regression Analysis
*==============================================================================*

ssc install coefplot, replace
ssc install scheme-burd, replace
ssc install winsor2, replace
set scheme burd

*==============================================================================*
cd "P:\ECB business areas\DGR\Staff\papouts\Research\Active\Firm Heterogeneities"
global project "P:\ECB business areas\DGR\Staff\papouts\Research\Active\Firm Heterogeneities"
global data "${project}\Final datasets\"
global output "${project}\Final charts\Revision"
use "${data}\dbtr_level_b_dt.dta", clear

* create new variable for size and drop the original
tab sz_f
clonevar size = sz_f
drop sz_f 
* drop missing size, nace, country code
tab size
drop if size == .
tab nace_code 
drop if nace_code == ""
tab cntry_dbtr
drop if cntry_dbtr == ""
* drop countries
drop if inlist(cntry_dbtr, "CY", "EE", "LT", "LU", "LV", "MT", "SI", "SK")


* encode size, nace, country
* size
gen sz = "Micro"
replace sz = "Large" if size == 1
replace sz = "Medium" if size == 2
replace sz = "Small" if size == 3
* nace
gen nace = "Agriculture"
replace nace = "Mining" if nace_code == "B"
replace nace = "Manufacturing" if nace_code == "C"
replace nace = "Electricity" if nace_code == "D"
replace nace = "Water" if nace_code == "E"
replace nace = "Construction" if nace_code == "F"
replace nace = "Wholesale and retail" if nace_code == "G"
replace nace = "Transporting and storage" if nace_code == "H"
replace nace = "Accommodation and food services" if nace_code == "I"
replace nace = "Information and communication" if nace_code == "J"
replace nace = "Financial and insurance" if nace_code == "K"
replace nace = "Real estate" if nace_code == "L"
replace nace = "Professional activities" if nace_code == "M"
replace nace = "Administrative activities" if nace_code == "N"
replace nace = "Public administration" if nace_code == "O"
replace nace = "Education" if nace_code == "P"
replace nace = "Human health activities" if nace_code == "Q"
replace nace = "Arts, entertainment and recreation" if nace_code == "R"
replace nace = "Other services activities" if nace_code == "S"
replace nace = "Activities of households as employers" if nace_code == "T"
replace nace = "Activities of extraterritorial organisations and bodies" if nace_code == "U"

* check new variables
tab sz
tab nace

* encode to use in regressions
encode sz, gen(sz_f)
encode nace, gen(sctr_f)
encode cntry_dbtr, gen(cntry_f)

* check bank variables
sum i_size
sum b_spec_diff_dbtr

*==============================================================================*
* Main bank share
*==============================================================================*

* mean value
sum mb_shr


*** export regression table

eststo clear
	eststo: reg mb_shr  ibn.cntry_f , noconstant baselevels
	eststo: reg mb_shr ibn.cntry_f i.sz_f i.sctr_f , noconstant baselevels
	eststo: reg mb_shr ibn.cntry_f  i_size hhi_nuts3 b_spec_diff_dbtr i.sz_f i.sctr_f , noconstant baselevels //	
#delimit ;
	esttab using "${output}\Regr_Section3_reg_mb_banks_ordbysz_mean_hhi.tex",  ///
	replace label se star(* 0.10 ** 0.05 *** 0.01) stats(r2 N, fmt(2 %9.0fc) labels(\$R^2\$ Observations)) ///
	indicate("Firm size f.e. = *.sz_f" ///
	"Firm sector f.e. = *.sctr_f") ///
	noomit nobase coeflabels(hhi_nuts3 "Bank concentration" i_size "Bank size" b_spec_diff_dbtr "Bank specialization" ) /// 
	booktabs compress nodepvar nomtitles
	mlabels("Unconditional" "Firm controls" "Firm and bank controls") /// 	
	substitute(\footnotesize \smaller[1] r2 R2)
;
#delimit cr


*** export graph
* unconditional regression
reg mb_shr  ibn.cntry_f , noconstant baselevels
estimates store r_uncond


matrix coeff2 = e(b)'
matrix list coeff2
svmat coeff2
gen n = _n
preserve
	keep n coeff21
	keep if n >= 1 & n <12
	save "${data}\uncond_coeff.dta", replace
	egen mu_uncond = mean(coeff21)
	drop n coeff21
	duplicates drop *, force
	gen n = _n
	save "${data}\uncond.dta", replace
restore

* conditional regression
reg mb_shr ibn.cntry_f i.sz_f i.sctr_f, noconstant baselevels
estimates store r_cond


matrix coeff = e(b)'
matrix list coeff
svmat coeff
replace n = _n
preserve
	keep n coeff1
	keep if n >= 1 & n <12
	egen mu_cond = mean(coeff1)
	drop n coeff1
	duplicates drop *, force
	gen n = _n
	merge 1:1 n using "${data}\uncond.dta", gen(_merge)
	gen DCond_uncond = mu_uncond - mu_cond
	keep DCond_uncond n
	save "${data}\delta.dta", replace
restore

preserve
	keep n coeff1
	keep if n >= 1 & n <12
	replace n  = _n
	merge 1:1 n using "${data}\delta.dta", gen(_merge)
	egen delta = max(DCond_uncond) 
	drop DCond_uncond _merge
	gen coef = coeff1 + delta
	drop coeff1 delta
*	merge 1:1 n using uncond_coeff_w.dta, gen(_merge)
	save "${data}\reg_coefs1.dta", replace
restore

* bank controls with HHI
reg mb_shr ibn.cntry_f i.sz_f i.sctr_f hhi_nuts3 i_size b_spec_diff_dbtr , noconstant baselevels
estimates store r_cond_b

matrix coeff3 = e(b)'
matrix list coeff3
svmat coeff3
replace n = _n
preserve
	keep n coeff31
	keep if n >= 1 & n <12
	egen mu_cond = mean(coeff31)
	drop n coeff31
	duplicates drop *, force
	gen n = _n
	merge 1:1 n using "${data}\uncond.dta", gen(_merge)
	gen DCond_uncond = mu_uncond - mu_cond
	keep DCond_uncond n
	save "${data}\delta_b.dta", replace
restore

*preserve
keep n coeff3
keep if n >= 1 & n <12
replace n  = _n
merge 1:1 n using "${data}\delta_b.dta", gen(_merge)
egen delta = max(DCond_uncond) 
drop DCond_uncond _merge
gen coef3 = coeff3 + delta
drop coeff3 delta
*	merge 1:1 n using uncond_w.dta, gen(_merge2)
save "${data}\reg_coefs2.dta", replace

* merge to get all coefficients
merge 1:1 n using "${data}\reg_coefs1.dta", gen(_merge2)
merge 1:1 n using "${data}\uncond_coeff.dta", gen(_merge3)


gen c = ""
replace c = "AT" if _n == 1
replace c = "BE" if _n == 2
replace c = "DE" if _n == 3
replace c = "ES" if _n == 4
replace c = "FI" if _n == 5
replace c = "FR" if _n == 6
replace c = "GR" if _n == 7
replace c = "IE" if _n == 8
replace c = "IT" if _n == 9
replace c = "NL" if _n == 10
replace c = "PT" if _n == 11

* order as large firms - from python charts
gen order = 0
replace order = 1 if c == "IE"
replace order = 2 if c == "NL"
replace order = 3 if c == "FI"
replace order = 4 if c == "BE"
replace order = 5 if c == "DE"
replace order = 6 if c == "AT"
replace order = 7 if c == "FR"
replace order = 8 if c == "GR"
replace order = 9 if c == "PT"
replace order = 10 if c == "IT"
replace order = 11 if c == "ES"

* list coefficients
* coeff21 - unconditional
* coef - conditional
* coef3 - conditional including bank characteristics
list c coeff21 coef coef3 

* export with the mean value (with HHI)
graph bar coeff21 coef coef3, over(c, sort(order)) ///
ylabel( ,nogextend  ) yscale(range(0.8 1)) exclude0 plotregion(lcolor(black)) ///
ytitle("regression coefficient") bar(1, color(olive_teal) lcolor(black)) ///
bar(2, color(gold) lcolor(black)) ///
bar(3, color(teal) lcolor(black)) legend(label(1 "Unconditional") label(2 "Firm characteristics") label(3 "Firm and bank characteristics") position(12)) ///
yline(0.90, lcolor(black) lstyle("-"))
graph export "${output}\Section3_reg_mb_banks_ordbysz_mean_hhi.pdf", replace /* width(2500)	*/

translate @Results "${output}\3_2_reg_mbshr_banks_logfile.txt", replace
